Data preparations

Construct data frame

load("XSTSF_production.RData")
source('functions.R')


# add manual sandhi labels
label_sandhi <- read.csv('raw_data/sandhi_label.csv', 
                         na.strings = '') 
f0_all_pre_label <- f0_all_pre %>% 
  select(!sandhi_tone) %>% 
  left_join(label_sandhi[, c('ind_no', 'sandhi_tone', 'sandhi_tone_var', 'diortri')], 
            by = c('diortri', 'ind_no')) %>% 
  mutate(sandhi_tone_var = case_when(is.na(sandhi_tone_var) == TRUE ~ sandhi_tone,
                                     .default = sandhi_tone_var)) %>% 
  rename(normtime = time)

# get disyllabic citation data 
f0_di <- f0_all_pre_label %>% filter(diortri == 'di') 
f0_di_ct <- f0_di %>% filter(focus_condition == 'ct') %>% 
  # re-normalisation
  group_by(speaker) %>%
    mutate(f0ref = mean(f0, na.rm = T),
           norm_f0 = scale(log(f0))) %>% 
    ungroup()

# get H- & L-register initial di citation data
f0_di_ct_h <- f0_di_ct %>% filter(grepl('^H', mono_tone_1)) 
f0_di_ct_l <- f0_di_ct %>% filter(grepl('^[LR]', mono_tone_1)) 

draw_by
## function (dataframe, x, y) 
## {
##     p <- dataframe %>% ggplot(aes(x = normtime, y = norm_f0, 
##         group = interaction(syllable_no, ind_no), color = citation_no, 
##         linetype = citation_no, text = paste("speaker: ", speaker, 
##             "\ncitation tone: ", citation_tone, "\ncitation no:", 
##             citation_no, "\ntoken: ", token))) + geom_line() + 
##         {
##             if (missing(y)) {
##                 facet_wrap(as.formula(paste("~", x)), ncol = 2, 
##                   labeller = label_both)
##             }
##             else {
##                 facet_grid(as.formula(paste(y, "~", x)), labeller = label_value)
##             }
##         } + theme_bw() + theme(panel.spacing.y = unit(0.02, "cm", 
##         data = NULL), text = element_text(size = 10)) + ylim(-4, 
##         4)
##     p
## }
getwd()
## [1] "/Users/shiyibing/Desktop/PhD/XSTSF"

Initial data inspection

yinping-initial LC & MH

f0_di_lcmh_hp <- f0_di_ct_h %>% filter(grepl("^[LM].*p$", syntax_iniTone)) 
ggplotly(draw_by(f0_di_lcmh_hp, 'speaker'), tooltip = c('text', 'x'))

yinshang-initial LC & MH

f0_di_lcmh_hs <- f0_di_ct_h %>% filter(grepl("^[LM].*s$", syntax_iniTone)) 
ggplotly(draw_by(f0_di_lcmh_hs, 'speaker'), tooltip = c('text', 'x'))

yangping-initial LC & MH

f0_di_lcmh_lp <-f0_di_ct_l %>% filter(grepl("^[LM].*p$", syntax_iniTone)) 
ggplotly(draw_by(f0_di_lcmh_lp, 'speaker'), tooltip = c('text', 'x'))

yangshang-initial LC & MH

f0_di_lcmh_ls <-f0_di_ct_l %>% filter(grepl("^[LM].*s$", syntax_iniTone)) 
ggplotly(draw_by(f0_di_lcmh_ls, 'speaker'), tooltip = c('text', 'x'))

Perceptual analysis

H-register-initial LC & MH: average

f0_di_lcmh <- rbind(f0_di_lcmh_hp, f0_di_lcmh_hs) %>% 
  mutate(sandhi_tone = ifelse(sandhi_tone == 'HLLM', 'HMML', sandhi_tone),
         propdur = as.integer(normtime)/20) %>% 
  unite('groupvar', ind_no, syllable_no, sep = '_', remove = FALSE) %>% 
  filter(is.na(sandhi_tone) == FALSE)
  
unique(f0_di_lcmh$sandhi_tone) # check the labels
## [1] "HMML" "MHHL" "MMMH" "HHHH"
p_cluster(f0_di_lcmh, sandhi_tone)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

H-register-initial LC & MH: individual categories

p_cluster(f0_di_lcmh, sandhi_tone, 'speaker')
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

k-means clustering analysis

try doing k-means for the whole disyllabic citation dataset

f0_di_lcmh <- f0_di_ct %>% filter(grepl("^[LM]", syntax_iniTone)) %>% 
  select(-diortri, -syllable_no, -focus_no, -f0) %>% 
  spread(normtime, norm_f0)

start <- which(names(f0_di_lcmh)==1)
end <- which(names(f0_di_lcmh)== 20)
f0_di_lcmh_cluster<- cld(f0_di_lcmh, idAll=f0_di_lcmh$ind_no, timeInData=start:end, time=c(start:end))
kml(f0_di_lcmh_cluster, nbClusters = 2:10) 
##  ~ Fast KmL ~
## ***************************************************************************************************S
## 100 ********************************************************************************S
plot(f0_di_lcmh_cluster, 7, parTraj=parTRAJ(col="clusters"))